from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
from common.utils.mlmodel_util import get_model_info_sklearn, preprocess, get_model_info_auto_sklearn
from common.utils.model_util import load_model
from sklearn import model_selection
from common.utils.format_util import dup_name_handler
from config.auto_sklearn_config import AUTO_SKLEARN_CONFIG
from common.log import log_handler

log = log_handler.LogHandler().get_log()

try:
    from autosklearn.regression import AutoSklearnRegressor
except ImportError:
    log.error("haven't install auto-sklearn, so you cannot use auto-sklearn-regressor")


def predict(dataframe, para_list, output):
    # --------------------load the model-----------------
    model_save_path, _, feature_list = get_model_info_sklearn(para_list["model_id"])
    model = load_model(model_save_path)

    # --------------------make predictions----------------
    X = preprocess(dataframe, para_list, feature_list)
    pred = model.predict(X)

    output_cols = "_prediction_"
    output_cols = dup_name_handler(output_cols, para_list["feature_col"] + [para_list["label_col"]])
    output["result"]["output_params"]["output_cols"] = output_cols
    dataframe[output_cols] = pred
    return dataframe


def train(dataframe, para_list, record, output):
    # --------------------prepare data------------------
    feature_list = []
    label_col = para_list["label_col"]
    y = dataframe[label_col].values

    nonan_index = ~np.isnan(y.astype(float))
    y = y[nonan_index]
    if len(np.unique(y)) == 2:
        y = y + 1e-10
    dataframe = dataframe[nonan_index]
    X = preprocess(dataframe, para_list, feature_list)
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y,
                                                                        test_size=para_list["split_rate"][1],
                                                                        random_state=0)

    # ---------------------model fit---------------------
    config = AUTO_SKLEARN_CONFIG
    config['time_left_for_this_task'] = para_list['timeout']
    config['include_estimators'] = para_list['include']
    model = AutoSklearnRegressor().set_params(**config)
    model.fit(X_train, y_train)
    model_info = get_model_info_auto_sklearn(model, "regressor")
    if type(model_info) == str:
        output["error"] = model_info
        return model, dataframe

    # ---------------------get metrics--------------------
    n = len(y)
    p = len(feature_list)

    y_pred = model.predict(X)
    MSE = mean_squared_error(y, y_pred)
    RMSE = np.sqrt(MSE)
    MAE = mean_absolute_error(y, y_pred)
    r2 = model.score(X, y)
    r2adj = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

    # ---------------------record info----------------------
    other_info = {
        "feature_list": feature_list,
        "feature_col": para_list["feature_col"],
        "model_info": model_info,
        "MSE": MSE,
        "MAE": MAE,
        "RMSE": RMSE,
        "r2": r2,
        "r2adj": r2adj,
    }
    record["other_info"] = other_info

    output_cols = "_prediction_"
    dataframe[output_cols] = y_pred
    return model, dataframe
